In [1]:
import pandas as pd 
import numpy as np
import xgboost
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix,precision_recall_fscore_support, auc, f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn import preprocessing
from skopt import gp_minimize
from skopt.space import Real, Integer, Dimension,Categorical
from functools import partial

import plotly.express as px
import shap
import warnings
warnings.filterwarnings('ignore')
In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
train.drop(train.columns[0], axis=1, inplace=True)
test.drop(test.columns[0], axis=1, inplace=True)
In [3]:
X_train = train[train.columns[:-1]]
y_train = train[train.columns[-1]]
X_test = test[test.columns[:-1]] 
y_test = test[test.columns[-1]]
In [4]:
model = XGBClassifier(n_jobs = 6, early_stopping_rounds = 10)
In [5]:
model.fit(X_train, y_train, verbose =2)
Out[5]:
XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, early_stopping_rounds=10,
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints=None, learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints=None, n_estimators=100, n_jobs=6,
              num_parallel_tree=1, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method=None, validate_parameters=False, verbosity=None)
In [6]:
indices = np.argsort(model.feature_importances_)[::-1]
for idx, i in enumerate(indices):
    #print("{}.\t{} - {}".format(idx, X_train.columns[i], model.feature_importances_[i]))
    if(model.feature_importances_[i] == 0.0):
        train.drop(X_train.columns[i], axis=1, inplace=True)
        test.drop(X_train.columns[i], axis=1, inplace=True)
In [7]:
model.fit(X_train, y_train, verbose =3)
Out[7]:
XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, early_stopping_rounds=10,
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints=None, learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints=None, n_estimators=100, n_jobs=6,
              num_parallel_tree=1, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method=None, validate_parameters=False, verbosity=None)
In [8]:
xgboost.plot_importance(model)
#plt.title("xgboost.plot_importance(model)")
#plt.show()
#plt.savefig('xg_mi.png', dpi = 300)
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x13b23dd68>
In [9]:
xgboost.to_graphviz(model, num_trees=2)
Out[9]:
%3 0 fBodyAcc-skewness()-X<-0.376263261 1 tGravityAcc-mean()-Y<-0.086602971 0->1 yes, missing 2 tBodyAcc-correlation()-X,Z<0.343385875 0->2 no 3 fBodyAccJerk-bandsEnergy()-9,16.2<-0.974300802 1->3 yes, missing 4 tGravityAcc-mean()-X<0.944243312 1->4 no 5 fBodyAcc-max()-X<-0.99416101 2->5 yes, missing 6 tGravityAcc-max()-Y<-0.179676294 2->6 no 7 tBodyGyro-min()-X<0.845551968 3->7 yes, missing 8 leaf=-0.17578125 3->8 no 9 leaf=-0.179130912 4->9 yes, missing 10 subject<28 4->10 no 15 tGravityAcc-mean()-X<0.255770355 7->15 yes, missing 16 fBodyGyro-bandsEnergy()-57,64.1<-0.999990702 7->16 no 27 leaf=-0.124137938 15->27 yes, missing 28 tBodyGyro-correlation()-X,Y<0.689952135 15->28 no 29 leaf=0.471428543 16->29 yes, missing 30 tBodyAcc-std()-X<-0.998380184 16->30 no 43 leaf=0.841923714 28->43 yes, missing 44 leaf=0.155172408 28->44 no 45 leaf=0.0473684184 30->45 yes, missing 46 leaf=-0.140963852 30->46 no 17 tGravityAcc-energy()-Y<-0.999835789 10->17 yes, missing 18 leaf=0.841726601 10->18 no 31 fBodyGyro-mean()-X<-0.987600327 17->31 yes, missing 32 tBodyGyro-correlation()-Y,Z<0.440319538 17->32 no 47 leaf=0.153191477 31->47 yes, missing 48 leaf=0.81382972 31->48 no 49 leaf=-0.0759740323 32->49 yes, missing 50 leaf=0.5625 32->50 no 11 tGravityAcc-min()-Y<0.0906088054 5->11 yes, missing 12 fBodyAcc-max()-X<-0.833744586 5->12 no 13 fBodyGyro-bandsEnergy()-25,32<-0.998062611 6->13 yes, missing 14 fBodyAcc-kurtosis()-Z<0.178477585 6->14 no 19 tBodyGyroJerk-mad()-X<-0.996376157 11->19 yes, missing 20 leaf=-0.174671054 11->20 no 21 tGravityAcc-mean()-Y<-0.0737660378 12->21 yes, missing 22 leaf=-0.179794908 12->22 no 33 tBodyAcc-energy()-X<-0.999963164 19->33 yes, missing 34 tGravityAcc-mean()-X<0.106364369 19->34 no 51 leaf=-0.148543686 33->51 yes, missing 52 leaf=0.189473689 33->52 no 53 leaf=-0.112500004 34->53 yes, missing 54 leaf=0.713793039 34->54 no 35 tGravityAcc-mean()-X<0.262532949 21->35 yes, missing 36 tBodyGyro-mad()-Z<-0.74562192 21->36 no 55 leaf=-0.169480518 35->55 yes, missing 56 leaf=0.717336178 35->56 no 57 leaf=-0.176313296 36->57 yes, missing 58 leaf=0.214285716 36->58 no 23 leaf=0.869318128 13->23 yes, missing 24 tBodyAcc-mean()-Y<0.0011895108 13->24 no 25 tBodyAcc-min()-X<0.85094142 14->25 yes, missing 26 tBodyGyro-max()-Z<-0.701174736 14->26 no 37 leaf=-0.151327431 24->37 yes, missing 38 leaf=0.0473684184 24->38 no 39 fBodyBodyAccJerkMag-std()<-0.99566257 25->39 yes, missing 40 leaf=0.33157894 25->40 no 41 tBodyAcc-mean()-Z<-0.151957244 26->41 yes, missing 42 leaf=-0.145161286 26->42 no 59 leaf=0.155172408 39->59 yes, missing 60 leaf=-0.169104666 39->60 no 61 leaf=-0.0428571478 41->61 yes, missing 62 leaf=0.579661012 41->62 no
In [10]:
y_pred = model.predict(X_test)
y_pred_prop = model.predict_proba(X_test)
predictions=y_pred.copy()
conf= confusion_matrix(y_test, predictions)
Labels=list(train.Activity.unique())
pd.DataFrame(conf,columns=Labels, index=Labels)
Out[10]:
STANDING SITTING LAYING WALKING WALKING_DOWNSTAIRS WALKING_UPSTAIRS
STANDING 537 0 0 0 0 0
SITTING 0 411 77 0 0 3
LAYING 0 29 503 0 0 0
WALKING 0 0 0 487 5 4
WALKING_DOWNSTAIRS 0 0 0 10 384 26
WALKING_UPSTAIRS 0 0 0 30 5 436
In [11]:
pr_rec_f_supp=precision_recall_fscore_support(y_test, predictions)
DF_report=pd.DataFrame({'Precision':list(pr_rec_f_supp[0]),
                        'Recall':list(pr_rec_f_supp[1]),
                        'F-Score':list(pr_rec_f_supp[2]),
                        'Support':list(pr_rec_f_supp[3])}, index=Labels)
DF_report
Out[11]:
Precision Recall F-Score Support
STANDING 1.000000 1.000000 1.000000 537
SITTING 0.934091 0.837067 0.882922 491
LAYING 0.867241 0.945489 0.904676 532
WALKING 0.924099 0.981855 0.952102 496
WALKING_DOWNSTAIRS 0.974619 0.914286 0.943489 420
WALKING_UPSTAIRS 0.929638 0.925690 0.927660 471
In [12]:
X_test_new = X_test[::5]
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test_new)
print(len(explainer.expected_value))
print(len(shap_values))
6
6
In [13]:
shap.summary_plot(shap_values[0], X_test_new)
In [14]:
shap.summary_plot(shap_values, X_test)
In [15]:
model = OneVsRestClassifier(XGBClassifier(n_jobs = 6, early_stopping_rounds = 10))
In [16]:
y_train_bin = preprocessing.label_binarize(y_train, classes=[0, 1, 2, 3])
parameters = {'estimator__nthread':[6], #when use hyperthread, xgboost may become slower
              'estimator__objective':['multi:softprob'],
              'estimator__learning_rate': [0.05,0.1,0.2,0.3,0.01], #so called `eta` value
              'estimator__max_depth': [6,5,4,3,2,1],
              'estimator__min_child_weight': [11,12,13,14,15,16],
              'estimator__subsample': [0.8],
              'estimator__colsample_bytree': [0.7],
              'estimator__n_estimators': [5,100], #number of trees, change it to 1000 for better results
             }


clf = GridSearchCV(model, parameters, n_jobs=5,scoring = 'f1_samples',verbose=1, refit=True, return_train_score = True)
In [17]:
clf.fit(X_train, y_train_bin)
Fitting 5 folds for each of 360 candidates, totalling 1800 fits
[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done  45 tasks      | elapsed:    2.2s
[Parallel(n_jobs=5)]: Done 635 tasks      | elapsed:    9.4s
[Parallel(n_jobs=5)]: Done 1635 tasks      | elapsed:   21.9s
[Parallel(n_jobs=5)]: Done 1791 out of 1800 | elapsed:   23.6s remaining:    0.1s
[Parallel(n_jobs=5)]: Done 1800 out of 1800 | elapsed:   23.8s finished
Out[17]:
GridSearchCV(cv=None, error_score=nan,
             estimator=OneVsRestClassifier(estimator=XGBClassifier(base_score=None,
                                                                   booster=None,
                                                                   colsample_bylevel=None,
                                                                   colsample_bynode=None,
                                                                   colsample_bytree=None,
                                                                   early_stopping_rounds=10,
                                                                   gamma=None,
                                                                   gpu_id=None,
                                                                   importance_type='gain',
                                                                   interaction_constraints=None,
                                                                   learning_rate=None,
                                                                   max_delta_step=None,
                                                                   max_depth=None,
                                                                   min_child...
                         'estimator__learning_rate': [0.05, 0.1, 0.2, 0.3,
                                                      0.01],
                         'estimator__max_depth': [6, 5, 4, 3, 2, 1],
                         'estimator__min_child_weight': [11, 12, 13, 14, 15,
                                                         16],
                         'estimator__n_estimators': [5, 100],
                         'estimator__nthread': [6],
                         'estimator__objective': ['multi:softprob'],
                         'estimator__subsample': [0.8]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
             scoring='f1_samples', verbose=1)
In [18]:
print(getattr(clf, 'best_estimator_', None))
OneVsRestClassifier(estimator=XGBClassifier(base_score=None, booster=None,
                                            colsample_bylevel=None,
                                            colsample_bynode=None,
                                            colsample_bytree=0.7,
                                            early_stopping_rounds=10,
                                            gamma=None, gpu_id=None,
                                            importance_type='gain',
                                            interaction_constraints=None,
                                            learning_rate=0.05,
                                            max_delta_step=None, max_depth=6,
                                            min_child_weight=11, missing=nan,
                                            monotone_constraints=None,
                                            n_estimators=5, n_jobs=6, nthread=6,
                                            num_parallel_tree=None,
                                            objective='multi:softprob',
                                            random_state=None, reg_alpha=None,
                                            reg_lambda=None,
                                            scale_pos_weight=None,
                                            subsample=0.8, tree_method=None,
                                            validate_parameters=False,
                                            verbosity=None),
                    n_jobs=None)
In [97]:
space = [
    Real(0.6, 0.7, name="colsample_bylevel"),
    Real(0.6, 0.7, name="colsample_bytree"),
    Real(0.01, 1, name="gamma"),
    Real(0.0001, 1, name="learning_rate"),
    Real(0.1, 10, name="max_delta_step"),
    Integer(6, 15, name="max_depth"),
    Real(10, 500, name="min_child_weight"),
    Integer(10, 100, name="n_estimators"),
    Real(0.1, 100, name="reg_alpha"),
    Real(0.1, 100, name="reg_lambda"),
    Real(0.4, 0.7, name="subsample"),
    Categorical(('multi:softprob',))
]
In [108]:
def return_model_assessment(args, X_train, y_train, X_test):
    global models, train_scores, test_scores, curr_model_hyper_params
    params = {curr_model_hyper_params[i]: args[i] for i, j in enumerate(curr_model_hyper_params)}
    model = XGBClassifier(random_state=42, seed=42)
    model.set_params(**params)
    fitted_model = model.fit(X_train, y_train, sample_weight=None)
    models.append(fitted_model)
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    train_score = f1_score(train_predictions, y_train , average = 'macro')
    test_score = f1_score(test_predictions, y_test, average = 'macro')
    train_scores.append(train_score)
    test_scores.append(test_score)
    return 1 - test_score
In [109]:
models = []
train_scores = []
test_scores = []
curr_model_hyper_params = ['colsample_bylevel', 'colsample_bytree', 'gamma', 'learning_rate', 'max_delta_step',
                        'max_depth', 'min_child_weight', 'n_estimators', 'reg_alpha', 'reg_lambda', 'subsample']
objective_function = partial(return_model_assessment, X_train=X_train, y_train=y_train, X_test=X_test)

# running the algorithm
n_calls = 50 # number of times you want to train your model
results = gp_minimize(objective_function, space, base_estimator=None, n_calls=50, n_random_starts=n_calls-1, random_state=42, verbose = True, n_jobs =3)
Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 8.0201
Function value obtained: 0.1116
Current minimum: 0.1116
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 23.1906
Function value obtained: 0.1014
Current minimum: 0.1014
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 8.8855
Function value obtained: 0.1419
Current minimum: 0.1014
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 12.2287
Function value obtained: 0.0906
Current minimum: 0.0906
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 9.1672
Function value obtained: 0.1132
Current minimum: 0.0906
Iteration No: 6 started. Evaluating function at random point.
Iteration No: 6 ended. Evaluation done at random point.
Time taken: 6.0315
Function value obtained: 0.1165
Current minimum: 0.0906
Iteration No: 7 started. Evaluating function at random point.
Iteration No: 7 ended. Evaluation done at random point.
Time taken: 15.7261
Function value obtained: 0.1719
Current minimum: 0.0906
Iteration No: 8 started. Evaluating function at random point.
Iteration No: 8 ended. Evaluation done at random point.
Time taken: 14.5699
Function value obtained: 0.6410
Current minimum: 0.0906
Iteration No: 9 started. Evaluating function at random point.
Iteration No: 9 ended. Evaluation done at random point.
Time taken: 16.8205
Function value obtained: 0.1102
Current minimum: 0.0906
Iteration No: 10 started. Evaluating function at random point.
Iteration No: 10 ended. Evaluation done at random point.
Time taken: 19.1054
Function value obtained: 0.0914
Current minimum: 0.0906
Iteration No: 11 started. Evaluating function at random point.
Iteration No: 11 ended. Evaluation done at random point.
Time taken: 10.2773
Function value obtained: 0.1077
Current minimum: 0.0906
Iteration No: 12 started. Evaluating function at random point.
Iteration No: 12 ended. Evaluation done at random point.
Time taken: 18.1536
Function value obtained: 0.1051
Current minimum: 0.0906
Iteration No: 13 started. Evaluating function at random point.
Iteration No: 13 ended. Evaluation done at random point.
Time taken: 16.2757
Function value obtained: 0.1183
Current minimum: 0.0906
Iteration No: 14 started. Evaluating function at random point.
Iteration No: 14 ended. Evaluation done at random point.
Time taken: 17.7345
Function value obtained: 0.1210
Current minimum: 0.0906
Iteration No: 15 started. Evaluating function at random point.
Iteration No: 15 ended. Evaluation done at random point.
Time taken: 27.4055
Function value obtained: 0.1004
Current minimum: 0.0906
Iteration No: 16 started. Evaluating function at random point.
Iteration No: 16 ended. Evaluation done at random point.
Time taken: 9.9089
Function value obtained: 0.1429
Current minimum: 0.0906
Iteration No: 17 started. Evaluating function at random point.
Iteration No: 17 ended. Evaluation done at random point.
Time taken: 22.1312
Function value obtained: 0.1223
Current minimum: 0.0906
Iteration No: 18 started. Evaluating function at random point.
Iteration No: 18 ended. Evaluation done at random point.
Time taken: 10.7469
Function value obtained: 0.1275
Current minimum: 0.0906
Iteration No: 19 started. Evaluating function at random point.
Iteration No: 19 ended. Evaluation done at random point.
Time taken: 13.2454
Function value obtained: 0.1165
Current minimum: 0.0906
Iteration No: 20 started. Evaluating function at random point.
Iteration No: 20 ended. Evaluation done at random point.
Time taken: 13.9841
Function value obtained: 0.1278
Current minimum: 0.0906
Iteration No: 21 started. Evaluating function at random point.
Iteration No: 21 ended. Evaluation done at random point.
Time taken: 13.4655
Function value obtained: 0.1288
Current minimum: 0.0906
Iteration No: 22 started. Evaluating function at random point.
Iteration No: 22 ended. Evaluation done at random point.
Time taken: 11.5678
Function value obtained: 0.1172
Current minimum: 0.0906
Iteration No: 23 started. Evaluating function at random point.
Iteration No: 23 ended. Evaluation done at random point.
Time taken: 7.4741
Function value obtained: 0.1296
Current minimum: 0.0906
Iteration No: 24 started. Evaluating function at random point.
Iteration No: 24 ended. Evaluation done at random point.
Time taken: 12.1041
Function value obtained: 0.1360
Current minimum: 0.0906
Iteration No: 25 started. Evaluating function at random point.
Iteration No: 25 ended. Evaluation done at random point.
Time taken: 34.0576
Function value obtained: 0.1263
Current minimum: 0.0906
Iteration No: 26 started. Evaluating function at random point.
Iteration No: 26 ended. Evaluation done at random point.
Time taken: 4.7613
Function value obtained: 0.1423
Current minimum: 0.0906
Iteration No: 27 started. Evaluating function at random point.
Iteration No: 27 ended. Evaluation done at random point.
Time taken: 7.1513
Function value obtained: 0.1141
Current minimum: 0.0906
Iteration No: 28 started. Evaluating function at random point.
Iteration No: 28 ended. Evaluation done at random point.
Time taken: 6.8681
Function value obtained: 0.1059
Current minimum: 0.0906
Iteration No: 29 started. Evaluating function at random point.
Iteration No: 29 ended. Evaluation done at random point.
Time taken: 13.2302
Function value obtained: 0.1155
Current minimum: 0.0906
Iteration No: 30 started. Evaluating function at random point.
Iteration No: 30 ended. Evaluation done at random point.
Time taken: 18.0704
Function value obtained: 0.1359
Current minimum: 0.0906
Iteration No: 31 started. Evaluating function at random point.
Iteration No: 31 ended. Evaluation done at random point.
Time taken: 5.7770
Function value obtained: 0.1488
Current minimum: 0.0906
Iteration No: 32 started. Evaluating function at random point.
Iteration No: 32 ended. Evaluation done at random point.
Time taken: 18.1884
Function value obtained: 0.1075
Current minimum: 0.0906
Iteration No: 33 started. Evaluating function at random point.
Iteration No: 33 ended. Evaluation done at random point.
Time taken: 5.9448
Function value obtained: 0.1155
Current minimum: 0.0906
Iteration No: 34 started. Evaluating function at random point.
Iteration No: 34 ended. Evaluation done at random point.
Time taken: 9.8887
Function value obtained: 0.1165
Current minimum: 0.0906
Iteration No: 35 started. Evaluating function at random point.
Iteration No: 35 ended. Evaluation done at random point.
Time taken: 7.5617
Function value obtained: 0.1167
Current minimum: 0.0906
Iteration No: 36 started. Evaluating function at random point.
Iteration No: 36 ended. Evaluation done at random point.
Time taken: 12.0753
Function value obtained: 0.1423
Current minimum: 0.0906
Iteration No: 37 started. Evaluating function at random point.
Iteration No: 37 ended. Evaluation done at random point.
Time taken: 25.2535
Function value obtained: 0.0917
Current minimum: 0.0906
Iteration No: 38 started. Evaluating function at random point.
Iteration No: 38 ended. Evaluation done at random point.
Time taken: 16.9894
Function value obtained: 0.1131
Current minimum: 0.0906
Iteration No: 39 started. Evaluating function at random point.
Iteration No: 39 ended. Evaluation done at random point.
Time taken: 5.7443
Function value obtained: 0.1821
Current minimum: 0.0906
Iteration No: 40 started. Evaluating function at random point.
Iteration No: 40 ended. Evaluation done at random point.
Time taken: 14.5373
Function value obtained: 0.7683
Current minimum: 0.0906
Iteration No: 41 started. Evaluating function at random point.
Iteration No: 41 ended. Evaluation done at random point.
Time taken: 14.2276
Function value obtained: 0.6363
Current minimum: 0.0906
Iteration No: 42 started. Evaluating function at random point.
Iteration No: 42 ended. Evaluation done at random point.
Time taken: 14.2562
Function value obtained: 0.0940
Current minimum: 0.0906
Iteration No: 43 started. Evaluating function at random point.
Iteration No: 43 ended. Evaluation done at random point.
Time taken: 3.1098
Function value obtained: 0.7057
Current minimum: 0.0906
Iteration No: 44 started. Evaluating function at random point.
Iteration No: 44 ended. Evaluation done at random point.
Time taken: 10.3044
Function value obtained: 0.4073
Current minimum: 0.0906
Iteration No: 45 started. Evaluating function at random point.
Iteration No: 45 ended. Evaluation done at random point.
Time taken: 12.5992
Function value obtained: 0.2004
Current minimum: 0.0906
Iteration No: 46 started. Evaluating function at random point.
Iteration No: 46 ended. Evaluation done at random point.
Time taken: 19.0701
Function value obtained: 0.1077
Current minimum: 0.0906
Iteration No: 47 started. Evaluating function at random point.
Iteration No: 47 ended. Evaluation done at random point.
Time taken: 14.1116
Function value obtained: 0.1224
Current minimum: 0.0906
Iteration No: 48 started. Evaluating function at random point.
Iteration No: 48 ended. Evaluation done at random point.
Time taken: 27.0236
Function value obtained: 0.1108
Current minimum: 0.0906
Iteration No: 49 started. Evaluating function at random point.
Iteration No: 49 ended. Evaluation done at random point.
Time taken: 14.0580
Function value obtained: 0.1403
Current minimum: 0.0906
Iteration No: 50 started. Searching for the next optimal point.
Iteration No: 50 ended. Search finished for the next optimal point.
Time taken: 38.0233
Function value obtained: 0.4607
Current minimum: 0.0906
In [110]:
metrics = pd.DataFrame(train_scores + test_scores)
metrics.loc[:,'dataset'] = ["train_score"]*n_calls + ["test_score"]*n_calls
metrics.loc[:,'Iteration Number'] = list(range(1,n_calls+1)) + list(range(1,n_calls+1))
metrics.columns = ["F1 Score", "dataset", "Iteration Number"]
fig = px.line(metrics, x="Iteration Number", y="F1 Score", color="dataset")
fig.show()
In [19]:
model_best = XGBClassifier(base_score=None, booster=None,
                                            colsample_bylevel=None,
                                            colsample_bynode=None,
                                            colsample_bytree=0.7,
                                            early_stopping_rounds=10,
                                            gamma=None, gpu_id=None,
                                            importance_type='gain',
                                            interaction_constraints=None,
                                            learning_rate=0.05,
                                            max_delta_step=None, max_depth=6,
                                            min_child_weight=11,
                                            monotone_constraints=None,
                                            n_estimators=5, n_jobs=6, nthread=6,
                                            num_parallel_tree=None,
                                            objective='multi:softprob',
                                            random_state=None, reg_alpha=None,
                                            reg_lambda=None,
                                            scale_pos_weight=None,
                                            subsample=0.8, tree_method=None,
                                            validate_parameters=False,
                                            verbosity=None)
                        
In [20]:
model_best.fit(X_train, y_train, verbose =3)
Out[20]:
XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.7,
              early_stopping_rounds=10, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints=None,
              learning_rate=0.05, max_delta_step=0, max_depth=6,
              min_child_weight=11, missing=nan, monotone_constraints=None,
              n_estimators=5, n_jobs=6, nthread=6, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=0.8,
              tree_method=None, validate_parameters=False, verbosity=None)
In [21]:
X_test_new = X_test[::5]
explainer = shap.TreeExplainer(model_best)
shap_values = explainer.shap_values(X_test_new)
print(len(explainer.expected_value))
print(len(shap_values))
6
6
In [22]:
shap.summary_plot(shap_values[0], X_test_new)
In [23]:
shap.summary_plot(shap_values[1], X_test_new)
In [24]:
shap.summary_plot(shap_values[2], X_test_new)
In [25]:
shap.summary_plot(shap_values[3], X_test_new)
In [26]:
shap.summary_plot(shap_values[4], X_test_new)
In [27]:
shap.summary_plot(shap_values[5], X_test_new)
In [28]:
y_pred = model_best.predict(X_test)
y_pred_prop = model_best.predict_proba(X_test)
predictions2=y_pred.copy()
conf= confusion_matrix(y_test, predictions)
Labels=list(train.Activity.unique())
pd.DataFrame(conf,columns=Labels, index=Labels)
Out[28]:
STANDING SITTING LAYING WALKING WALKING_DOWNSTAIRS WALKING_UPSTAIRS
STANDING 537 0 0 0 0 0
SITTING 0 411 77 0 0 3
LAYING 0 29 503 0 0 0
WALKING 0 0 0 487 5 4
WALKING_DOWNSTAIRS 0 0 0 10 384 26
WALKING_UPSTAIRS 0 0 0 30 5 436
In [29]:
pr_rec_f_supp=precision_recall_fscore_support(y_test, predictions2)
DF_report=pd.DataFrame({'Precision':list(pr_rec_f_supp[0]),
                        'Recall':list(pr_rec_f_supp[1]),
                        'F-Score':list(pr_rec_f_supp[2]),
                        'Support':list(pr_rec_f_supp[3])}, index=Labels)
DF_report
Out[29]:
Precision Recall F-Score Support
STANDING 1.000000 1.000000 1.000000 537
SITTING 0.824847 0.824847 0.824847 491
LAYING 0.844402 0.836466 0.840415 532
WALKING 0.897338 0.951613 0.923679 496
WALKING_DOWNSTAIRS 0.900000 0.878571 0.889157 420
WALKING_UPSTAIRS 0.892544 0.864119 0.878101 471
In [30]:
shap.initjs()
shap.force_plot(explainer.expected_value[1], shap_values[1])
Out[30]:
Visualization omitted, Javascript library not loaded!
Have you run `initjs()` in this notebook? If this notebook was from another user you must also trust this notebook (File -> Trust notebook). If you are viewing this notebook on github the Javascript has been stripped for security. If you are using JupyterLab this error is because a JupyterLab extension has not yet been written.
In [53]:
shap.decision_plot(explainer.expected_value[0], shap_values[0], feature_display_range=slice(None, -4, -1),ignore_warnings=True)
In [62]:
shap.decision_plot(explainer.expected_value[1], shap_values[1], feature_display_range=slice(None, -44, -1),ignore_warnings=True)
In [60]:
shap.decision_plot(explainer.expected_value[2], shap_values[2], feature_display_range=slice(None, -48, -1),ignore_warnings=True)
In [59]:
shap.decision_plot(explainer.expected_value[3], shap_values[3], feature_display_range=slice(None, -47, -1),ignore_warnings=True)
In [58]:
shap.decision_plot(explainer.expected_value[4], shap_values[4], feature_display_range=slice(None, -48, -1),ignore_warnings=True)
In [46]:
shap.decision_plot(explainer.expected_value[5], shap_values[5], feature_display_range=slice(None, -55, -1),ignore_warnings=True)
In [39]:
shap.summary_plot(shap_values, X_test)